import pandas as pd
import gene_exp_10x
import matplotlib.pyplot as plt
%matplotlib inline
from copy import deepcopy
from clustergrammer2 import net
def umi_norm(df):
# umi norm
barcode_umi_sum = df.sum()
df_umi = df.div(barcode_umi_sum)
return df_umi
import random
def subsample_cats(df, cat_index, num_samples, sample_type='subsample', random_state=99):
cols = df.columns.tolist()
cats = sorted(list(set([x[cat_index] for x in cols])))
if sample_type == 'subsample':
keep_cols = []
for inst_cat in cats:
keep_cat_cols = [x for x in cols if x[cat_index] == inst_cat]
if len(keep_cat_cols) > num_samples:
keep_cat_cols = random.sample(keep_cat_cols, num_samples)
keep_cols.extend(keep_cat_cols)
df_sample = df[keep_cols]
elif sample_type == 'downsample':
df_list = []
for inst_cat in cats:
keep_cat_cols = [x for x in cols if x[cat_index] == inst_cat]
df_cat = df[keep_cat_cols]
if df_cat.shape[1] > num_samples:
net.load_df(df_cat)
net.downsample(axis='col', ds_type='kmeans', num_samples=num_samples, random_state=random_state)
df_ds = net.export_df()
else:
net.load_df(df_cat)
net.downsample(axis='col', ds_type='kmeans', num_samples=df_cat.shape[1], random_state=random_state)
df_ds = net.export_df()
df_list.append(df_ds)
df_sample = pd.concat(df_list, axis=1)
return df_sample
df_meta = pd.read_csv('../download/atlas/meta.tab', sep='\t', index_col=0)
df_meta.shape
df_ini = gene_exp_10x.load_gene_exp_to_df('../download/atlas/atlas_data_5k-sum/')
df_ini.shape
df_ini.columns.tolist()[:3]
df_meta.columns.tolist()
ser_doublet = df_meta['doublet']
ser_singlet = ser_doublet[ser_doublet == False]
print(ser_doublet.shape)
print(ser_singlet.shape)
ser_ct = df_meta['celltype']
ser_ct.value_counts().sort_values(ascending=False).plot('bar', figsize=(15,5))
cols = df_ini.columns.tolist()
new_cols = [(x, 'Cell Type: ' + str(ser_ct[x])) for x in cols]
print(new_cols[0])
df_cat = deepcopy(df_ini)
df_cat.columns = new_cols
df_cat.columns.tolist()[:5]
# num_cells = 10000
# df_small = umi_norm(df_cat.iloc[:,:num_cells])
# df_small.shape
df_small = subsample_cats(df_cat, cat_index=1, num_samples=250, sample_type='subsample', random_state=99)
df_small_umi = umi_norm(df_small)
df_small_umi.shape
net.load_df(df_small_umi)
net.filter_N_top(inst_rc='row', N_top=250, rank_type='var')
net.normalize(axis='row', norm_type='zscore')
net.clip(-5,5)
net.load_df(net.export_df().round(2))
net.widget()